{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import os\n",
    "import codecs\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>合晟资产是一家专注于股票、债券等二级市场投资，为合格投资者提供专业资产管理服务的企业。公司业...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>公司的主营业务为向中小微企业、个体工商户、农户等客户提供贷款服务，自设立以来主营业务未发生过变化。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>公司立足于商业地产服务，致力于为商业地产开发、销售、运营全产业链提供一整套增值服务，业务覆盖...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>公司经工商管理部门核准的经营范围为“投资咨询、经济信息咨询，企业管理咨询，品牌推广策划，公共...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>该公司的主营业务为在中国境内(港、澳、台除外)开展保险代理销售，依托于自身的产品研究能力和专...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   label                                               text\n",
       "0      2  合晟资产是一家专注于股票、债券等二级市场投资，为合格投资者提供专业资产管理服务的企业。公司业...\n",
       "1      2  公司的主营业务为向中小微企业、个体工商户、农户等客户提供贷款服务，自设立以来主营业务未发生过变化。\n",
       "2      1  公司立足于商业地产服务，致力于为商业地产开发、销售、运营全产业链提供一整套增值服务，业务覆盖...\n",
       "3      2  公司经工商管理部门核准的经营范围为“投资咨询、经济信息咨询，企业管理咨询，品牌推广策划，公共...\n",
       "4      2  该公司的主营业务为在中国境内(港、澳、台除外)开展保险代理销售，依托于自身的产品研究能力和专..."
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#path to where the data lies\n",
    "dpath = './data/'\n",
    "train = pd.read_csv(dpath + \"training.csv\", header=None, encoding='utf8')\n",
    "train.columns = ['label', 'text']\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEJCAYAAABlmAtYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAVBUlEQVR4nO3df7DddX3n8eeLoAgoBZoLSxNqsJtVgVHUTBZ1x2WNs0SlBH/QCR3azMIu1lILTncraWfWdjvZtT+2K3ULLRUEqwubgjZZOv5g0lJ3R5S9CNaEyJIKQkpMLlIL1Rk0+N4/zpfxeL3kc7m553tuuM/HzJnzPZ/z+X7fnxMu93W/vz4nVYUkSQdy2LgHIEla+AwLSVKTYSFJajIsJElNhoUkqenwcQ9gVJYuXVorVqwY9zAk6ZBy1113PVpVE9Pbn7NhsWLFCiYnJ8c9DEk6pCT5+kztHoaSJDUZFpKkJsNCktRkWEiSmgwLSVKTYSFJajIsJElNhoUkqcmwkCQ1PWfv4F7sbr3uzSPd/jkXfWrG9v/2P84ead33/uxnRrp9STNzz0KS1GRYSJKaDAtJUpNhIUlqMiwkSU2GhSSpybCQJDWNLCySXJdkX5LtQ22/m+SrSf4mySeTHDv03sYku5Lcl+TsofbXJPlK994fJMmoxixJmtko9yyuB9ZOa7sNOL2qXgH8P2AjQJJTgfXAad06VyVZ0q1zNXAJsLJ7TN+mJGnERhYWVfU54LFpbZ+tqv3dyy8Ay7vldcBNVfVkVT0A7AJWJzkJOKaq7qiqAj4KnDeqMUuSZjbOcxYXAU/PGbEMeHjovd1d27JueXr7jJJckmQyyeTU1NQ8D1eSFq+xhEWSXwf2Ax9/ummGbnWA9hlV1TVVtaqqVk1MTBz8QCVJwBgmEkyyATgHWNMdWoLBHsPJQ92WA4907ctnaJck9ajXPYska4H3AedW1XeG3toKrE9yRJJTGJzIvrOq9gBPJDmzuwrq54EtfY5ZkjTCPYskNwJnAUuT7Abez+DqpyOA27orYL9QVb9QVTuSbAbuZXB46tKqeqrb1LsZXFl1JINzHDPPjS1JGpmRhUVVXTBD87UH6L8J2DRD+yRw+jwOTZL0LHkHtySpybCQJDUZFpKkJsNCktRkWEiSmgwLSVKTYSFJajIsJElNhoUkqcmwkCQ1GRaSpCbDQpLUZFhIkpoMC0lSk2EhSWoyLCRJTYaFJKnJsJAkNRkWkqQmw0KS1GRYSJKaDAtJUpNhIUlqMiwkSU0jC4sk1yXZl2T7UNvxSW5Lcn/3fNzQexuT7EpyX5Kzh9pfk+Qr3Xt/kCSjGrMkaWaj3LO4Hlg7re0KYFtVrQS2da9JciqwHjitW+eqJEu6da4GLgFWdo/p25QkjdjIwqKqPgc8Nq15HXBDt3wDcN5Q+01V9WRVPQDsAlYnOQk4pqruqKoCPjq0jiSpJ32fszixqvYAdM8ndO3LgIeH+u3u2pZ1y9PbZ5TkkiSTSSanpqbmdeCStJgtlBPcM52HqAO0z6iqrqmqVVW1amJiYt4GJ0mLXd9hsbc7tET3vK9r3w2cPNRvOfBI1758hnZJUo/6DoutwIZueQOwZah9fZIjkpzC4ET2nd2hqieSnNldBfXzQ+tIknpy+Kg2nORG4CxgaZLdwPuBDwCbk1wMPAScD1BVO5JsBu4F9gOXVtVT3abezeDKqiOBT3UPSVKPRhYWVXXBM7y15hn6bwI2zdA+CZw+j0OTJD1LC+UEtyRpATMsJElNhoUkqcmwkCQ1GRaSpCbDQpLUZFhIkpoMC0lSk2EhSWoyLCRJTYaFJKnJsJAkNRkWkqQmw0KS1GRYSJKaDAtJUpNhIUlqMiwkSU2GhSSpybCQJDUZFpKkJsNCktRkWEiSmsYSFknem2RHku1JbkzygiTHJ7ktyf3d83FD/Tcm2ZXkviRnj2PMkrSY9R4WSZYBvwysqqrTgSXAeuAKYFtVrQS2da9Jcmr3/mnAWuCqJEv6HrckLWaHj7HukUm+BxwFPAJsBM7q3r8BuB14H7AOuKmqngQeSLILWA3c0fOYpRm99ZY/Hun2/+Id7xrp9qXZ6H3Poqr+Dvg94CFgD/APVfVZ4MSq2tP12QOc0K2yDHh4aBO7u7YfkeSSJJNJJqempkb1ESRp0RnHYajjGOwtnAL8BHB0kgsPtMoMbTVTx6q6pqpWVdWqiYmJgx+sJAkYzwnuNwEPVNVUVX0P+ATwOmBvkpMAuud9Xf/dwMlD6y9ncNhKktSTcYTFQ8CZSY5KEmANsBPYCmzo+mwAtnTLW4H1SY5IcgqwEriz5zFL0qLW+wnuqvpikpuBLwH7gbuBa4AXApuTXMwgUM7v+u9Ishm4t+t/aVU91fe4JWkxG8vVUFX1fuD905qfZLCXMVP/TcCmUY9LkjQz7+CWJDUZFpKkJsNCktRkWEiSmgwLSVLTrMIiybbZtEmSnpsOeOlskhcwmOhvaTdNx9NTbxzDYKoOSdIi0LrP4l3A5QyC4S5+EBaPA384wnFJkhaQA4ZFVV0JXJnkPVX1oZ7GJElaYGZ1B3dVfSjJ64AVw+tU1UdHNC5J0gIyq7BI8qfATwH3AE/Py1SAYSFJi8Bs54ZaBZxaVTN+j4Qk6blttvdZbAf+ySgHIklauGa7Z7EUuDfJnQxmhwWgqs4dyagkSQvKbMPiN0Y5CEnSwjbbq6H+etQDkSQtXLO9GuoJBlc/ATwfeB7w7ao6ZlQDkyQtHLPds3jR8Osk5wGrRzIiSdKCM6dZZ6vqz4E3zvNYJEkL1GwPQ7196OVhDO678J4LSVokZns11E8PLe8HHgTWzftoJEkL0mzPWfybUQ9EkrRwzfbLj5Yn+WSSfUn2JrklyfJRD06StDDM9gT3R4CtDL7XYhnwv7o2SdIiMNuwmKiqj1TV/u5xPTAx16JJjk1yc5KvJtmZ5LVJjk9yW5L7u+fjhvpvTLIryX1Jzp5rXUnS3Mw2LB5NcmGSJd3jQuCbB1H3SuDTVfUy4JXATuAKYFtVrQS2da9JciqwHjgNWAtclWTJQdSWJD1Lsw2Li4CfAb4B7AHeCczppHeSY4A3ANcCVNV3q+pbDK6uuqHrdgNwXre8Dripqp6sqgeAXXhDoCT1arZh8VvAhqqaqKoTGITHb8yx5kuAKeAjSe5O8uEkRwMnVtUegO75hK7/MuDhofV3d20/IsklSSaTTE5NTc1xeJKk6WYbFq+oqr9/+kVVPQa8ao41DwdeDVxdVa8Cvk13yOkZZIa2GW8IrKprqmpVVa2amJjzKRVJ0jSzDYvDpp1wPp7Z39A33W5gd1V9sXt9M4Pw2JvkpG77JwH7hvqfPLT+cuCROdaWJM3BbMPivwKfT/JbSf4T8Hngd+ZSsKq+ATyc5KVd0xrgXgaX5m7o2jYAW7rlrcD6JEckOQVYCdw5l9qSpLmZ7R3cH00yyWDywABvr6p7D6Lue4CPJ3k+8DUGJ8sPAzYnuRh4CDi/q70jyWYGgbIfuLSqnjqI2pKkZ2nWh5K6cDiYgBje1j0MJiOcbs0z9N8EbJqP2pKkZ29OU5RLkhYXw0KS1GRYSJKaDAtJUpNhIUlqMiwkSU2GhSSpybCQJDUZFpKkJsNCktRkWEiSmgwLSVKTYSFJajIsJElNhoUkqcmwkCQ1GRaSpCbDQpLUZFhIkpoMC0lSk2EhSWoyLCRJTYaFJKlpbGGRZEmSu5Pc2r0+PsltSe7vno8b6rsxya4k9yU5e1xjlqTFapx7FpcBO4deXwFsq6qVwLbuNUlOBdYDpwFrgauSLOl5rJK0qI0lLJIsB94KfHioeR1wQ7d8A3DeUPtNVfVkVT0A7AJW9zVWSdL49iw+CPwq8P2hthOrag9A93xC174MeHio3+6uTZLUk97DIsk5wL6qumu2q8zQVs+w7UuSTCaZnJqamvMYJUk/bBx7Fq8Hzk3yIHAT8MYkHwP2JjkJoHve1/XfDZw8tP5y4JGZNlxV11TVqqpaNTExMarxS9Ki03tYVNXGqlpeVSsYnLj+y6q6ENgKbOi6bQC2dMtbgfVJjkhyCrASuLPnYUvSonb4uAcw5APA5iQXAw8B5wNU1Y4km4F7gf3ApVX11PiGKUmLz1jDoqpuB27vlr8JrHmGfpuATb0NTJL0Q7yDW5LUZFhIkpoMC0lSk2EhSWoyLCRJTYaFJKnJsJAkNRkWkqQmw0KS1GRYSJKaFtLcUJKehXNv3tLudJC2vnPdyGvo0OCehSSpybCQJDUZFpKkJsNCktTkCW5JmoWvXrV3pNt/2S+eONLtHyz3LCRJTYaFJKnJsJAkNRkWkqQmw0KS1GRYSJKaDAtJUpP3Weg54c1bfmHkNT617o9GXkNaqHrfs0hycpK/SrIzyY4kl3Xtxye5Lcn93fNxQ+tsTLIryX1Jzu57zJK02I3jMNR+4Feq6uXAmcClSU4FrgC2VdVKYFv3mu699cBpwFrgqiRLxjBuSVq0ej8MVVV7gD3d8hNJdgLLgHXAWV23G4Dbgfd17TdV1ZPAA0l2AauBO/oduaSnnX/L9pFu/8/ecfpIt69nb6wnuJOsAF4FfBE4sQuSpwPlhK7bMuDhodV2d20zbe+SJJNJJqempkY1bEladMYWFkleCNwCXF5Vjx+o6wxtNVPHqrqmqlZV1aqJiYn5GKYkiTGFRZLnMQiKj1fVJ7rmvUlO6t4/CdjXte8GTh5afTnwSF9jlSSN52qoANcCO6vq94fe2gps6JY3AFuG2tcnOSLJKcBK4M6+xitJGs99Fq8Hfg74SpJ7urZfAz4AbE5yMfAQcD5AVe1Ishm4l8GVVJdW1VP9D1uSFq9xXA31f5j5PATAmmdYZxOwaWSDkiQdkHdwSzpkbL7l0ZHX+Jl3LB15jUORc0NJkpoMC0lSk2EhSWoyLCRJTYaFJKnJsJAkNRkWkqQmw0KS1GRYSJKaDAtJUpPTfYzQ335o3chr/NR7trQ7STpk7f3gaCfZPvHy1bPq556FJKnJsJAkNRkWkqQmw0KS1GRYSJKaDAtJUtOiuHR26uqPjXT7E+++cKTbl6Rxc89CktRkWEiSmgwLSVKTYSFJajIsJElNh0xYJFmb5L4ku5JcMe7xSNJickiERZIlwB8CbwZOBS5Icup4RyVJi8chERbAamBXVX2tqr4L3ASMfv5vSRIAqapxj6EpyTuBtVX1b7vXPwf886r6pWn9LgEu6V6+FLhvjiWXAo/Ocd2DMa6646ztZ14ctRdb3XHWPti6L66qiemNh8od3Jmh7UdSrqquAa456GLJZFWtOtjtHCp1x1nbz7w4ai+2uuOsPaq6h8phqN3AyUOvlwOPjGkskrToHCph8X+BlUlOSfJ8YD2wdcxjkqRF45A4DFVV+5P8EvAZYAlwXVXtGGHJgz6UdYjVHWdtP/PiqL3Y6o6z9kjqHhInuCVJ43WoHIaSJI2RYSFJajIshiS5Lsm+JNt7rntykr9KsjPJjiSX9VT3BUnuTPLlru5v9lF3qP6SJHcnubXnug8m+UqSe5JM9lz72CQ3J/lq99/7tT3UfGn3WZ9+PJ7k8lHX7Wq/t/vZ2p7kxiQv6KNuV/uyru6OUX/emX53JDk+yW1J7u+ej+up7vndZ/5+knm7hNaw+GHXA2vHUHc/8CtV9XLgTODSnqYzeRJ4Y1W9EjgDWJvkzB7qPu0yYGeP9Yb9q6o6YwzXwV8JfLqqXga8kh4+f1Xd133WM4DXAN8BPjnqukmWAb8MrKqq0xlcnLJ+1HW72qcD/47B7A+vBM5JsnKEJa/nR393XAFsq6qVwLbudR91twNvBz43n4UMiyFV9TngsTHU3VNVX+qWn2DwC2RZD3Wrqv6xe/m87tHLFQ9JlgNvBT7cR72FIMkxwBuAawGq6rtV9a2eh7EG+Nuq+npP9Q4HjkxyOHAU/d0f9XLgC1X1naraD/w18LZRFXuG3x3rgBu65RuA8/qoW1U7q2qus1c8I8NigUmyAngV8MWe6i1Jcg+wD7itqnqpC3wQ+FXg+z3VG1bAZ5Pc1U0R05eXAFPAR7rDbx9OcnSP9WHwl/2NfRSqqr8Dfg94CNgD/ENVfbaP2gz+un5Dkh9PchTwFn74xt4+nFhVe2DwByFwQs/155VhsYAkeSFwC3B5VT3eR82qeqo7PLEcWN3tvo9UknOAfVV116hrPYPXV9WrGcxifGmSN/RU93Dg1cDVVfUq4NuM5tDEjLobWs8F/qynescx+Ov6FOAngKOTXNhH7araCfw2cBvwaeDLDA73ao4MiwUiyfMYBMXHq+oTfdfvDofcTj/nbF4PnJvkQQYzCL8xycd6qAtAVT3SPe9jcOx+dU+ldwO7h/bebmYQHn15M/ClqtrbU703AQ9U1VRVfQ/4BPC6nmpTVddW1aur6g0MDtXc31ftzt4kJwF0z/t6rj+vDIsFIEkYHMfeWVW/32PdiSTHdstHMvif+6ujrltVG6tqeVWtYHBY5C+rqpe/OJMcneRFTy8D/5rBIYuRq6pvAA8neWnXtAa4t4/anQvo6RBU5yHgzCRHdT/ja+jxgoYkJ3TPP8nghG+fnx0GUxJt6JY3AFt6rj+/qspH92Dww7QH+B6DvwIv7qnuv2BwHP1vgHu6x1t6qPsK4O6u7nbgP47h3/ws4NYe672EwSGJLwM7gF/v+fOeAUx2/+Z/DhzXU92jgG8CP9bz5/1NBn+AbAf+FDiix9r/m0EYfxlYM+JaP/K7A/hxBldB3d89H99T3bd1y08Ce4HPzEctp/uQJDV5GEqS1GRYSJKaDAtJUpNhIUlqMiwkSU2GhXQQuhls3zSLfpXkn86xxpzXleaLYSFJajIsJElNhoU0D5KsTnJHkm8l2ZPkv3cT9w17S5KvJXk0ye8mOWxo/Yu6L0P6+ySfSfLinj+CdECGhTQ/ngLeCywFXstgHqRfnNbnbcAqBpMHrgMuAkhyHvBrDOYvmmAwTUXf8xhJB2RYSPOgqu6qqi9U1f6qehD4Y+BfTuv221X1WFU9xOD7PC7o2t8F/JcafGnNfuA/A2e4d6GFxLCQ5kGSf5bk1iTfSPI4g1/4S6d1e3ho+esMvuMB4MXAld0hrG8xmE479PBtidJsGRbS/LiaweyqK6vqGAaHlTKtz/A3tf0kP/iK0YeBd1XVsUOPI6vq8yMftTRLhoU0P14EPA78Y5KXAe+eoc9/SHJckpOBy4D/2bX/EbAxyWkASX4syfl9DFqaLcNCmh//HvhZ4AngT/hBEAzbAtzF4PtK/oLBF15RVZ9k8BWgN3WHsLYz+FY7acHw+ywkSU3uWUiSmgwLSVKTYSFJajIsJElNhoUkqcmwkCQ1GRaSpCbDQpLU9P8BhV6vGIhiV2UAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig = plt.figure()\n",
    "sns.countplot(train.label.values)\n",
    "plt.xlabel('label', fontsize=12)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
